/*===========================================================================
 *
 *                            PUBLIC DOMAIN NOTICE
 *               National Center for Biotechnology Information
 *
 *  This software/database is a "United States Government Work" under the
 *  terms of the United States Copyright Act.  It was written as part of
 *  the author's official duties as a United States Government employee and
 *  thus cannot be copyrighted.  This software/database is freely available
 *  to the public for use. The National Library of Medicine and the U.S.
 *  Government have not placed any restriction on its use or reproduction.
 *
 *  Although all reasonable efforts have been taken to ensure the accuracy
 *  and reliability of the software and data, the NLM and the U.S.
 *  Government do not and cannot warrant the performance or results that
 *  may be obtained by using this software or data. The NLM and the U.S.
 *  Government disclaim all warranties, express or implied, including
 *  warranties of performance, merchantability or fitness for any particular
 *  purpose.
 *
 *  Please cite the author in any work or product based on this material.
 *
 * ===========================================================================
 *
 */

#ifndef _h_vcf_reader_
#define _h_vcf_reader_

#ifndef _h_klib_defs_
#include <klib/defs.h>
#endif

#ifdef __cplusplus
extern "C" {
#endif

#include <klib/text.h>

/*--------------------------------------------------------------------------
 * forwards
 */
struct KFile;
struct VNamelist;

typedef struct VcfReader VcfReader;

typedef struct VcfDataLine
{
    /* Fixed fields, per spec v. 4.2:
        http://www.1000genomes.org/wiki/analysis/variant-call-format/vcf-variant-call-format-version-42 
    */
    String      chromosome; /* An identifier from the reference genome or "<ID>" pointing to a contig in the assembly file */
    uint32_t    position;   /* The reference position, with the 1st base having position 1 */
    String      id;         /* semi-colon separated list of unique identifiers where available; "." if missing */
    String      refBases;   /* reference base(s) */
    String      altBases;   /* comma separated list of alternate non-reference alleles called on at least one of the samples; "." if missing */
    uint8_t     quality;    /* phred-scaled quality score for the assertion made in ALT */
    String      filter;     /* "PASS", or a semicolon-separated list of codes for filters that fail*/
    String      info;       /* a semicolon-separated series of keys with optional values in the format: <key>[=<data>[,data]] */ 

    /* Genotype fields, each represented as a String pointing into source */
    struct VNamelist*  genotypeFields;
    
    uint16_t  lastPopulated; /* index of the last populated data item (parser's internal use) */
} VcfDataLine;

/*=============== VcfReaderMake ================*/

/* Make
 *  Creates a new instance of VcfReader
 *
 *  self [OUT] return parameter for the new reader object
 *
 */
rc_t VcfReaderMake( const VcfReader** self );

/* Parse
 *  Parses a VCF file. Can be used repeatedly on the same object.
 *
 *  self [ IN ] the reader object
 *
 *  file [ IN ] a readable file object. The entire file will be parsed.
 *
 *  message [ OUT ] error messages generated by the parser. Set to NULL if no messages were generated.
 *  the pointer is valid until the next call to Parse or VcfReaderWhack on the reader object 
 */
rc_t VcfReaderParse( VcfReader* self, struct KFile* file, const struct VNamelist** messages );

/* Whack
 *  releases object obtained from VcfReaderMake
 */
rc_t VcfReaderWhack( VcfReader* self );

/*  GetDataLineCount
 * Returns the number of data lines in the parsed file 
 */
rc_t VcfReaderGetDataLineCount( const VcfReader* self, uint32_t* count );

/*  GetDataLine
 * Returns the data line at the specified 0-based index, 
 * The returned pointer is valid until the reader is whacked
 */
rc_t VcfReaderGetDataLine( const VcfReader* self, uint32_t index, const VcfDataLine** line );

#ifdef __cplusplus
}
#endif

#endif /* _h_vcf_reader_ */
